
library(rio)
library(ggplot2)
library(texreg)
library(tidyverse)
library(AER)
library(broom)

##set the working directory for the replication archive
working_dir <- "~/Dropbox/dueling project/2nd_paper/replication archive/"

source(paste0(working_dir,"Auxiliary Scripts/RegressionRunner.R"))

se <- function(m) summary(m)$coef[,2]


##load in the main datasets
carpenter <- readRDS(paste0(working_dir,"Data/carpenter_petitions.RDS"))
gamm_putnam <- readRDS(paste0(working_dir,"Data/gamm_putnam_longrun.RDS"))
rel_growth <- readRDS(paste0(working_dir,"Data/religionEarly20thCen.RDS"))
churches1850 <- readRDS(paste0(working_dir,"Data/churches_1850.RDS"))
churches1860 <- readRDS(paste0(working_dir,"Data/churches_1860.RDS"))
pos <- readRDS(paste0(working_dir,"Data/PostOfficesByDecade_county.Rds"))
all_together <- readRDS(paste0(working_dir,"Data/turnoutMerged.RDS"))
old_news <- readRDS(paste0(working_dir,"Data/newspapers_historical.RDS"))

colnames(pos)[c(1,2)] <- c("stateabbr", "county_name")
pos$county_name <- toupper(pos$county_name)
pos <- pos[,1:4]

carpenter_with <- merge(carpenter %>% filter(countypop >= 0), 
                        pos %>% filter(decade == 1830), by = c("stateabbr", "county_name"), all.x = TRUE)

##replicate carpenter apsr
modCarp <- lm(totnamesper1000  ~ log((numPost + 1))  + factor(stateabbr) + factor(congress) , data = carpenter_with)

##Turnout regresions
turn1840 <- lm(PresTurnout ~ log(postLag1 + 1) + turnLag1 + malePop, data = all_together %>% filter(year == 1840))
turn1860 <- lm(PresTurnout ~ log(postLag1 + 1) + turnLag1 + malePop, data = all_together %>% filter(year == 1860))


##run newspaper regressions
papers1840 <- lm(log(all_papers1840 + 1)  ~ log(numPost + 1) + I(DVoteshare) +
                log(AREA_SQMI) + 
                log(foreign_born_tot_1850 + 1) + log(tot_manuf_output_1850 + 1) +
                log(capital_manuf_1850 + 1) + foreign_born_tot_pop_sh_1850 + factor(state), 
              data = churches1850 %>% filter(decade == 1830))

##run the 1850/1860 Churches Regression
carp_sum <- carpenter_with %>% 
  group_by(county_name, stateabbr) %>% 
  summarise(totnamesper1000 =  1000*mean(totnames/countypop), south = mean(state_southplusMDKYTN))
carp_sum$county_name <- tolower(carp_sum$county_name)
colnames(carp_sum)[1:2] <- c("county", "state")
  
mod1850 <- lm(log(1000*(tot_seats_churches_1850 + 1)/tot_pop_1850) ~ log((numPost + 1)) + I(DVoteshare) +
                log(all_papers1840 + 1) +
                log(tot_pop_1850) + log(AREA_SQMI) + 
                log(foreign_born_tot_1850 + 1) + log(tot_manuf_output_1850 + 1) +
                log(capital_manuf_1850 + 1) + foreign_born_tot_pop_sh_1850, 
              data = churches1850 %>% filter(decade == 1840)) #carp_sum, by = c("county", "state"), all.x = TRUE))

mod1860 <- lm(log(1000*(tot_seats_churches_1860 + 1)/tot_pop_1860) ~ log((numPost + 1)) + I(DVoteshare) +
                log(all_papers1840 + 1) +
                log(tot_pop_1860) + log(AREA_SQMI_1860) + 
                log(foreign_born_tot_1860 + 1) + log(tot_manuf_output_1860 + 1) +
                log(capital_manuf_1860 + 1) + foreign_born_tot_pop_sh_1860, 
              data = churches1860 %>% filter(decade == 1850))

##run the 20th Century Churches regression
mod1906 <- lm(log(1000*(AllAdherents1906 + 1)/(pop1906)) ~ log(numPost + 1)  + log((total_value_1890 + 1)/tot_pop_1890) + 
                factor(state), data = rel_growth)
mod1916 <- lm(log(1000*(AllAdherents1916 + 1)/(pop1916)) ~ log(numPost + 1)  + log((total_value_1890 + 1)/tot_pop_1890) + 
                factor(state), data = rel_growth)
mod1926 <- lm(log(1000*(AllAdherents1926 + 1)/(pop1926)) ~ log(numPost + 1)  + log((total_value_1890 + 1)/tot_pop_1890) + 
                factor(state), data = rel_growth)
mod1936 <- lm(log(1000*(AllAdherents1936 + 1)/(pop1936)) ~ log(numPost + 1)  + log((total_value_1890 + 1)/tot_pop_1890) + 
                factor(state), data = rel_growth)

##now we run the gamm/putnam reressions
mod_all_assn <- lm(log(1000*(n_assn)/pop) ~ log((1+numPost)/tot_pop) + I((decade - 1900)/10) + factor(state), 
                   data = gamm_putnam %>% filter(decade %in% c(1900, 1910, 1920, 1940, 1960)))

mod_womens_assn <- lm(log(1000*(n_womens + 1)/pop) ~ log((1+numPost)/tot_pop) + I((decade - 1900)/10) + factor(state), 
                      data = gamm_putnam %>% filter(decade %in% c(1900, 1910, 1920, 1940, 1960)))

mod_masons_assn <- lm(log(1000*(n_masons + n_odd + 1)/pop) ~ log((1+numPost)/tot_pop) + I((decade - 1900)/10) + factor(state), 
                      data = gamm_putnam %>% filter(decade %in% c(1900, 1910, 1920, 1940, 1960)))


##now we begin to assemble
sc_longdata <- 
  data.frame(
    "Model Name" = c("Antislavery Petition Signatures,\n1832-45",
                     "Newspapers,\n1840",
                     paste0("Voter Turnout,\n", c(1860)),
                     paste0("Church Adherents,\n", c(1850, 1860, 1906, 1916, 1926, 1936)),
                     "Total Civic Associations,\n1900-1960",
                     "Total Womens' Associations,\n1900-1960",
                     "Total Masonic & Odd Fellows\nLodges, 1900-1960"
                     ),
    # "Type" = c(rep("Churches", 6), rep("Associations", 3)),
    "estimate" = c(coef(modCarp)[2], coef(papers1840)[2],  coef(turn1860)[2],
                   coef(mod1850)[2], coef(mod1860)[2], coef(mod1906)[2], coef(mod1916)[2], coef(mod1926)[2], coef(mod1936)[2],
                    coef(mod_all_assn)[2], coef(mod_womens_assn)[2], coef(mod_masons_assn)[2]),
    "se" = c(se(modCarp)[2], se(papers1840)[2],  se(turn1860)[2],
             se(mod1850)[2], se(mod1860)[2], se(mod1906)[2], se(mod1916)[2], se(mod1926)[2], se(mod1936)[2],
             se(mod_all_assn)[2], se(mod_womens_assn)[2], se(mod_masons_assn)[2])
  )

sc_longdata$`Postal Decade Used` <- c(rep("1830", 2), "1850", "1840", "1850", rep("1890", 7))


sc_longplot <-
  sc_longdata %>% 
  mutate(Model.Name = factor(Model.Name, levels =  c("Antislavery Petition Signatures,\n1832-45",
                                                     "Newspapers,\n1840",
                                                     paste0("Voter Turnout,\n", c(1840, 1860)),
                                                     paste0("Church Adherents,\n", c(1850, 1860, 1906, 1916, 1926, 1936)),
                                                     "Total Civic Associations,\n1900-1960",
                                                     "Total Womens' Associations,\n1900-1960",
                                                     "Total Masonic & Odd Fellows\nLodges, 1900-1960"
  ))) %>%
  ggplot(., aes(reorder(Model.Name, desc(Model.Name)), estimate, colour = `Postal Decade Used`)) + 
  geom_point(size = 5, position=position_dodge(width=0.5)) +
  geom_errorbar(
    aes(ymin = estimate - 1.65*se, ymax = estimate + 1.65*se),
    width = 0.01,
    linetype = "solid",
    position=position_dodge(width=0.5)) +
  #ylim(c(0,8)) +
  geom_hline(yintercept = 0, linetype = "dashed") + 
  ylab("Coefficient on Post Offices (logged)") +
  xlab("") +
  coord_flip() +
  theme_bw() +
  theme(legend.title = element_text(size = 25),
        axis.text.x = element_text(size = 25), axis.text.y = element_text(size = 25),
        legend.text = element_text(size = 25),
        axis.title.x = element_text(size = 25),
        axis.title.y = element_text(size = 25),
        strip.text.x = element_text(size = 25)) + 
  theme(legend.position="bottom") +
  labs(colour = "Postal Decade Used")

ggsave(sc_longplot, 
       file = paste0(working_dir,"Replication Plots/civic_orgs_and_churches.pdf"), 
       width = 14, height = 11)
